PROC IMPORT
DATAFILE="D:\Amazon Price Dynamics\New Data\Data Scraped\Amazon Others_merged\Cluster Analysis\price_changes_boardgames_July2025.csv"
OUT=price_changes_sellertype
DBMS=csv REPLACE;
run;
PROC IMPORT 
DATAFILE="D:\Amazon Price Dynamics\New Data\Data Scraped\Amazon Others_merged\Cluster Analysis\Sellerclusters_boardgames_July2025.csv"
out = SELLER_TYPE_TREND1
DBMS=csv REPLACE;
run;
PROC IMPORT
DATAFILE="D:\Amazon Price Dynamics\New Data\Data Scraped\Amazon Others_merged\Cluster Analysis\pricechangetrend_boardgames_July2025.csv" 
out=final_outputvector
DBMS=csv REPLACE;
run;


/***subsetting for the relevant analysis SKUs***/

data relevantSKUsonly;
set price_changes_sellertype;
where ASIN in ('B00UB25IJA', 'B018RFIVYG', 'B00UD6EXIQ') ; 
run;

data relevantSKUs;
set price_changes_sellertype;
where ASIN in ('B00UB25IJA', 'B018RFIVYG', 'B00UD6EXIQ') and seller_type in ('Amazon New', 'Cluster 1', 'Cluster 2',
'Cluster 3', 'Cluster 4'); 
run;

data relevantSKUs_used;
set price_changes_sellertype;
where ASIN in ('B00UB25IJA', 'B018RFIVYG', 'B00UD6EXIQ') and seller_type not in ('Amazon New', 'Cluster 1', 'Cluster 2',
'Cluster 3', 'Cluster 4'); 
run;




PROC SQL;
CREATE TABLE numsellers_newoffers AS
SELECT
TIME1,
ASIN,
COUNT(DISTINCT SELLER_ID) AS NUM_SELLERS
from 
relevantSKUs
group by
time1,
ASIN;
quit;

PROC SQL;
CREATE TABLE numsellersbyeachtype_newoffers AS
SELECT
TIME1,
ASIN,
seller_type,
COUNT(DISTINCT SELLER_ID) AS NUM_SELLERS
from 
relevantSKUsonly
group by
time1,
ASIN,
seller_type;
quit;

/**creating num sellers DV**/
proc sort data = numsellers_newoffers (keep=time1) nodupkey out= final_numsellers_outputvector;
by time1;
run;

%macro numsellers(b,c);
data numsellers&b. (keep = time1 NUM_SELLERS_&b.);
set numsellers_newoffers;
where ASIN = &c.;
rename NUM_SELLERS = NUM_SELLERS_&b. ;
run;

data final_numsellers_outputvector ;
merge final_numsellers_outputvector (in=a) numsellers&b. (in=b);
by time1;
if a ;
if a and not b then NUM_SELLERS_&b. = 0;
run;
%mend;
%numsellers(b = USO1, c = 'B00UB25IJA'); 
%numsellers(b = FFLite, c = 'B018RFIVYG'); 
%numsellers(b = USO2, c = 'B00UD6EXIQ'); 

PROC EXPORT 
DATA=final_numsellers_outputvector
DBMS=csv
OUTFILE= "D:\Amazon Price Dynamics\New Data\Data Scraped\Amazon Others_merged\Cluster Analysis\numsellers_dv2outcomeboardgame_July2025.csv"
REPLACE;
run;


/***covariate creation**/

proc sort data = SELLER_TYPE_TREND1 out = SELLER_TYPE_TREND11;
by time1;
run;
data SELLER_TYPE_TREND2;
set SELLER_TYPE_TREND11;
if seller_type in ('3P Old Focal', '3P Old Non-Focal') then seller_type = '3P Old';
else if seller_type in ('Amazon Old Focal', 'Amazon Old Non-Focal') then seller_type = 'Amazon Old';
else seller_type = seller_type;
run;

/**seller characteristics trend***/
proc freq data = SELLER_TYPE_TREND2;
tables seller_type;
run;


/*subset for 3p new sellers*/
data SELLER_TYPE_TREND3;
set SELLER_TYPE_TREND2;
where seller_type not in ('Amazon Ne', '3P Old', 'Amazon Ol');
run;
proc freq data = SELLER_TYPE_TREND3;
tables seller_type;
run;


PROC SQL;
CREATE TABLE SELLER_charcs_byasin AS
SELECT
TIME1,
ASIN,
SELLER_TYPE,
mean(FBA) as mean_FBA,
/*std(FBA) as sd_FBA,*/
mean(Freeship) as mean_freeship,
/*std(Freeship) as sd_freeship,*/
mean(Prime) as mean_prime,
/*std(Prime) as sd_prime,*/
sum(condition) as new_offers,
mean(num_seller_Posrating) as mean_posrating,
/*std(num_seller_Posrating) as sd_posrating,*/
mean(num_seller_ratings) as mean_numratings,
/*std(num_seller_ratings) as sd_numratings,*/
mean(Seller_Rating) as mean_starrate
/*std(Seller_Rating) as sd_starrate*/
from 
SELLER_TYPE_TREND3
group by
time1,
ASIN,
seller_type;
quit;

/**converting the data by time, one row per date**/

proc contents data = SELLER_charcs_byasin;
run;

proc sort data = SELLER_charcs_byasin (keep=time1) nodupkey out= SELLER_charcs_byasin2;
by time1;
run;


%macro sellerchar(b,c);
data SELLER_charcs_byasin&b. (keep = time1 mean_FBA&b. 
mean_freeship&b. mean_numratings&b.   
mean_posrating&b. 
mean_prime&b.   
mean_starrate&b.
new_offers&b.);
set SELLER_charcs_byasin;
where ASIN = &c. ;
rename mean_FBA = mean_FBA&b.;
rename mean_freeship = mean_freeship&b.;  
rename mean_numratings = mean_numratings&b.;    
rename mean_posrating =  mean_posrating&b.;    
rename mean_prime =  mean_prime&b.;    
rename mean_starrate = mean_starrate&b.;
rename new_offers = new_offers&b.;
run;

data SELLER_charcs_byasin2 ;
merge SELLER_charcs_byasin2 (in=a) SELLER_charcs_byasin&b. (in=b);
by time1;
if a ;
if a and not b then do;
mean_FBA&b. = 0;
mean_freeship&b. = 0;
mean_posrating&b. = 0;
mean_prime&b. =0;
mean_starrate&b. = 0;
new_offers&b.=0;
end;
run;
%mend;
%sellerchar(b = USO2_3p, c = 'B00UB25IJA'); 
%sellerchar(b = Fflite_3p, c = 'B018RFIVYG'); 
%sellerchar(b = USO1_3p, c = 'B00UD6EXIQ');


/**coding all missings with 0 values**/

proc contents data = SELLER_charcs_byasin2;
run;

data seller_characs_byasin3;
set SELLER_charcs_byasin2;
if	mean_FBAFflite_3p	=. Then	mean_FBAFflite_3p	=0;
if	mean_FBAUSO1_3p	=. Then	mean_FBAUSO1_3p	=0;
if	mean_FBAUSO2_3p	=. Then	mean_FBAUSO2_3p	=0;
if	mean_freeshipFflite_3p	=. Then	mean_freeshipFflite_3p	=0;
if	mean_freeshipUSO1_3p	=. Then	mean_freeshipUSO1_3p	=0;
if	mean_freeshipUSO2_3p	=. Then	mean_freeshipUSO2_3p	=0;
if	mean_numratingsFflite_3p	=. Then	mean_numratingsFflite_3p	=0;
if	mean_numratingsUSO1_3p	=. Then	mean_numratingsUSO1_3p	=0;
if	mean_numratingsUSO2_3p	=. Then	mean_numratingsUSO2_3p	=0;
if	mean_posratingFflite_3p	=. Then	mean_posratingFflite_3p	=0;
if	mean_posratingUSO1_3p	=. Then	mean_posratingUSO1_3p	=0;
if	mean_posratingUSO2_3p	=. Then	mean_posratingUSO2_3p	=0;
if	mean_primeFflite_3p	=. Then	mean_primeFflite_3p	=0;
if	mean_primeUSO1_3p	=. Then	mean_primeUSO1_3p	=0;
if	mean_primeUSO2_3p	=. Then	mean_primeUSO2_3p	=0;
if	mean_starrateFflite_3p	=. Then	mean_starrateFflite_3p	=0;
if	mean_starrateUSO1_3p	=. Then	mean_starrateUSO1_3p	=0;
if	mean_starrateUSO2_3p	=. Then	mean_starrateUSO2_3p	=0;
if	new_offersFflite_3p	=. Then	new_offersFflite_3p	=0;
if	new_offersUSO1_3p	=. Then	new_offersUSO1_3p	=0;
if	new_offersUSO2_3p	=. Then	new_offersUSO2_3p	=0;

run;



proc sort data = seller_characs_byasin3 nodupkey out=seller_characs_byasin4 ;
by  TIME1 ;
run;


proc expand data=seller_characs_byasin4 out=seller_characs_byasin5 method = none; 
convert	mean_FBAFflite_3p	=	mean_FBAFflite_3p_lag	/transformout = (lag 1);
convert	mean_FBAUSO1_3p	=	mean_FBAUSO1_3p_lag	/transformout = (lag 1);
convert	mean_FBAUSO2_3p	=	mean_FBAUSO2_3p_lag	/transformout = (lag 1);
convert	mean_freeshipFflite_3p	=	mean_freeshipFflite_3p_lag	/transformout = (lag 1);
convert	mean_freeshipUSO1_3p	=	mean_freeshipUSO1_3p_lag	/transformout = (lag 1);
convert	mean_freeshipUSO2_3p	=	mean_freeshipUSO2_3p_lag	/transformout = (lag 1);
convert	mean_numratingsFflite_3p	=	mean_numratingsFflite_3p_lag	/transformout = (lag 1);
convert	mean_numratingsUSO1_3p	=	mean_numratingsUSO1_3p_lag	/transformout = (lag 1);
convert	mean_numratingsUSO2_3p	=	mean_numratingsUSO2_3p_lag	/transformout = (lag 1);
convert	mean_posratingFflite_3p	=	mean_posratingFflite_3p_lag	/transformout = (lag 1);
convert	mean_posratingUSO1_3p	=	mean_posratingUSO1_3p_lag	/transformout = (lag 1);
convert	mean_posratingUSO2_3p	=	mean_posratingUSO2_3p_lag	/transformout = (lag 1);
convert	mean_primeFflite_3p	=	mean_primeFflite_3p_lag	/transformout = (lag 1);
convert	mean_primeUSO1_3p	=	mean_primeUSO1_3p_lag	/transformout = (lag 1);
convert	mean_primeUSO2_3p	=	mean_primeUSO2_3p_lag	/transformout = (lag 1);
convert	mean_starrateFflite_3p	=	mean_starrateFflite_3p_lag	/transformout = (lag 1);
convert	mean_starrateUSO1_3p	=	mean_starrateUSO1_3p_lag	/transformout = (lag 1);
convert	mean_starrateUSO2_3p	=	mean_starrateUSO2_3p_lag	/transformout = (lag 1);
convert	new_offersFflite_3p	=	new_offersFflite_3p_lag	/transformout = (lag 1);
convert	new_offersUSO1_3p	=	new_offersUSO1_3p_lag	/transformout = (lag 1);
convert	new_offersUSO2_3p	=	new_offersUSO2_3p_lag	/transformout = (lag 1);

run;


/*****REMAINING COVARIATES ARE FROM THE ORIGINAL DV - FINAL DATA SET*****/

PROC IMPORT
DATAFILE="D:\Amazon Price Dynamics\New Data\GAM + MVRF\Other categories\Board Games\final_gam_modeldata_JMRnd4_July2025.csv" 
out=final_gam_modeldata
DBMS=csv REPLACE;
run;



proc sort data = final_numsellers_outputvector out = sorted_finalnumsellersoutput;
by time1;
run;

proc sort data = final_gam_modeldata out = sorted_final_GAMMODELDATA;
by time1;
run;


proc sort data = seller_characs_byasin5 out = sorted_seller_characs_byasin5;
by time1;
run;


data finalnumsellers_modeldata;
merge sorted_finalnumsellersoutput (in=a) sorted_final_GAMMODELDATA (in=b) sorted_seller_characs_byasin5 (in=c);
by time1;
if a ;
run;

proc print data = finalnumsellers_modeldata (obs=20);
run;


/**Adding special Amazon holidays***/

/*Valentine's day sale: Feb 5th - Feb 14th 

Easter day sale: March 19-March 26 (2018)

Mothers day: May 13 (2018)

Amazon prime day: JUly 16 (2018); post-prime day sale July 18 (2018)

Black Friday:
    Nov 17th - Nov 24th (2017)
    Nov 16th - Nov 23rd (2018)

Cyber Monday:
	Nov 27th (2017), Nov 26th (2018)

Amazon digital day: Dec 29 (2017), Dec 28 (2018)

12 days of deal sale: Dec 3rd - Dec 14 (2017), Dec 2nd - Dec 13 (2018)

Christmas day sale: Dec 20-Dec25

Hanukkah sale: Dec 1- Dec 7

Post Christmas sale: Dec 26- Jan 1st*/

data finalnumsellers_modeldata1;
set finalnumsellers_modeldata;
if time1 in ( '01Jan2018'd , 
              '05Feb2018'd, '06Feb2018'd,'07Feb2018'd,'08Feb2018'd,'09Feb2018'd,'10Feb2018'd,'11Feb2018'd,
              '12Feb2018'd, '13Feb2018'd,'14Feb2018'd,
			  '19Mar2018'd, '20Mar2018'd,'21Mar2018'd,'22Mar2018'd,'23Mar2018'd,'24Mar2018'd,'25Mar2018'd,'26Mar2018'd,
			  '13May2018'd, '16Jul2018'd, '18Jul2018'd, 
              '17Nov2017'd,'18Nov2017'd,'19Nov2017'd,'20Nov2017'd,'21Nov2017'd,'22Nov2017'd, '23Nov2017'd,'24Nov2017'd,'27Nov2017'd,
			  '16Nov2018'd,'17Nov2018'd,'18Nov2018'd,'19Nov2018'd,'20Nov2018'd,'21Nov2018'd,'22Nov2018'd, '23Nov2018'd,'26Nov2018'd,
              '03Dec2017'd,'04Dec2017'd,'05Dec2017'd,'06Dec2017'd,'07Dec2017'd,'08Dec2017'd,
              '09Dec2017'd,'10Dec2017'd,'11Dec2017'd,'12Dec2017'd,'13Dec2017'd,'14Dec2017'd,
			  '02Dec2018'd,'03Dec2018'd,'04Dec2018'd,'05Dec2018'd,'06Dec2018'd,'07Dec2018'd,'08Dec2018'd,
              '09Dec2018'd,'10Dec2018'd,'11Dec2018'd,'12Dec2018'd,'13Dec2018'd) then seasonal_sale = 1;
else if time1 >= '20Dec2017'd and time1 <= '31Dec2017'd then seasonal_sale = 1;
else if time1 >= '20Dec2018'd and time1 <= '31Dec2018'd then seasonal_sale = 1;
else seasonal_sale = 0;
run;

proc freq data = finalnumsellers_modeldata1;
tables season*seasonal_sale /list;
run;
proc print data = finalnumsellers_modeldata1;
var Y_clust1_uso1;
run;

PROC EXPORT 
DATA=finalnumsellers_modeldata1
DBMS=csv
OUTFILE= "D:\Amazon Price Dynamics\New Data\GAM + MVRF\Other categories\Board Games\finalnumsellers_modeldataJMRnd4_July2025.csv"
REPLACE;
run;
